library(tidyverse)
library(bayesrules)
library(bayesplot)
library(rstan)
library(grid)
library(gridExtra)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ── ✔ dplyr 1.1.2 ✔ readr 2.1.4 ✔ forcats 1.0.0 ✔ stringr 1.5.0 ✔ ggplot2 3.4.2 ✔ tibble 3.2.1 ✔ lubridate 1.9.2 ✔ tidyr 1.3.0 ✔ purrr 1.0.1 ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ✖ dplyr::filter() masks stats::filter() ✖ dplyr::lag() masks stats::lag() ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors This is bayesplot version 1.10.0 - Online documentation and vignettes at mc-stan.org/bayesplot - bayesplot theme set to bayesplot::theme_default() * Does _not_ affect other ggplot2 plots * See ?bayesplot_theme_set for details on theme setting Loading required package: StanHeaders rstan (Version 2.21.8, GitRev: 2e1f913d3ca3) For execution on a local, multicore CPU with excess RAM we recommend calling options(mc.cores = parallel::detectCores()). To avoid recompilation of unchanged Stan programs, we recommend calling rstan_options(auto_write = TRUE) Attaching package: ‘rstan’ The following object is masked from ‘package:tidyr’: extract Attaching package: ‘gridExtra’ The following object is masked from ‘package:dplyr’: combine
lambda = 4.6
set.seed(84735)
rnorm( 1, mean=lambda, sd=2 )
lambda = 2.1
set.seed(84735)
rnorm( 1, mean=lambda, sd=7 )
lambda = 8.9
set.seed(84735)
runif(1, min=lambda-2, max=lambda+2)
lambda = 1.2
set.seed(84735)
runif(1, min=lambda-0.5, max=lambda+0.5)
lambda = 7.7
set.seed(84735)
runif(1, min=lambda-3, max=lambda+3)
lambda = 2
lambda2 = 2.1
Plot of unnormalized posterior:
options(repr.plot.width=15, repr.plot.height=4)
x <- seq(-1,1,0.01)
plot(x, x^(-2), type="l", col="red")
Acceptance rate:
lambda^2 / lambda2^2
Plot of unnormalized posterior:
x <- seq(-10,10,0.01)
plot(x, exp(x), type="l", col="red")
Acceptance rate:
exp(lambda2) / exp(lambda)
Plot of unnormalized posterior:
x <- seq(-10,10,0.01)
plot(x, exp(-10*x), type="l", col="red")
Acceptance rate:
exp(-10*lambda2) / exp(-10*lambda)
Plot of unnormalized posterior:
x <- seq(-10,10,0.01)
plot(x, exp(-x^4), type="l", col="red")
Acceptance rate:
exp(-lambda2^4) / exp(-lambda^4) * (lambda2/lambda)
lambda = 1.8
lambda2 = 1.6
Plot of unnormalized posterior:
options(repr.plot.width=15, repr.plot.height=4)
x <- seq(-1,1,0.01)
plot(x, x^(-1), type="l", col="red")
Acceptance rate:
lambda/lambda2
Plot of unnormalized posterior:
x <- seq(-10,10,0.01)
plot(x, exp(3*x), type="l", col="red")
Acceptance rate:
exp(3*lambda2) / exp(3*lambda)
Plot of unnormalized posterior:
x <- seq(-10,10,0.01)
plot(x, exp(-1.9*x), type="l", col="red")
Acceptance rate:
exp(-1.9*lambda2) / exp(-1.9*lambda)
Plot of unnormalized posterior:
x <- seq(-3,3,0.1)
plot(x, exp(-x^4), type="l", col="red")
Acceptance rate:
exp(-lambda2^4) / exp(-lambda^4) * (lambda2/lambda)
one_mh_iteration <- function(w, current){
# STEP 1: Propose the next chain location
proposal <- runif(1, min = current - w, max = current + w)
# STEP 2: Decide whether or not to go there
proposal_plaus <- dnorm(proposal, 0, 1) * dnorm(6.25, proposal, 0.75)
current_plaus <- dnorm(current, 0, 1) * dnorm(6.25, current, 0.75)
alpha <- min(1, proposal_plaus / current_plaus)
next_stop <- sample(c(proposal, current), size = 1, prob = c(alpha, 1-alpha))
# Return the results
return(data.frame(proposal, alpha, next_stop))
}
set.seed(1)
one_mh_iteration( w=0.01, current=3 )
| proposal | alpha | next_stop |
|---|---|---|
| <dbl> | <dbl> | <dbl> |
| 2.99531 | 0.987027 | 2.99531 |
The acceptance probability $\alpha=0.987$ is extremely high. The proposed jump was accepted. Since $w$ is very small, the new location is extremely close to the old location.
set.seed(1)
one_mh_iteration( w=0.5, current=3 )
| proposal | alpha | next_stop |
|---|---|---|
| <dbl> | <dbl> | <dbl> |
| 2.765509 | 0.483002 | 3 |
With the wider $w$, a location a bit further away from $3$ was proposed, leading to an acceptance probability of $\alpha=0.483$. The algorithm has rejected the jump.
set.seed(1)
one_mh_iteration( w=1, current=3 )
| proposal | alpha | next_stop |
|---|---|---|
| <dbl> | <dbl> | <dbl> |
| 2.531017 | 0.200246 | 3 |
The proposed location is now even further away, with a lower acceptance probability of $\alpha=0.2$. The algorithm has rejected the jump.
set.seed(1)
one_mh_iteration( w=3, current=3 )
| proposal | alpha | next_stop |
|---|---|---|
| <dbl> | <dbl> | <dbl> |
| 1.593052 | 0.001284355 | 3 |
The proposed location is even more away, now with an extremely small acceptance probability of $\alpha=0.001$.
It appears that $w=0.01$ is too small (leading to a too high acceptance rate) and that $w>1$ is too large (leading to a too low acceptance rate. Values around $w \sim 0.5$ seem to be reasonable.
From the book:
mh_tour <- function(N, w){
# 1. Start the chain at location 3
current <- 3
# 2. Initialize the simulation
mu <- rep(0, N)
# 3. Simulate N Markov chain stops
for(i in 1:N){
# Simulate one iteration
sim <- one_mh_iteration(w = w, current = current)
# Record next location
mu[i] <- sim$next_stop
# Reset the current location
current <- sim$next_stop
}
# 4. Return the chain locations
return(data.frame(iteration = c(1:N), mu))
}
True posterior:
summarize_normal_normal( mean=0, sd=1, sigma=0.75, y_bar=6.25, n=1 )
| model | mean | mode | var | sd |
|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <dbl> |
| prior | 0 | 0 | 1.00 | 1.0 |
| posterior | 4 | 4 | 0.36 | 0.6 |
plot_normal_normal( mean=0, sd=1, sigma=0.75, y_bar=6.25, n=1 )
res_a = mh_tour( N=50, w=50 )
plot_chain <- function( df, title="" ) {
par(mfrow=c(2,2))
p1 <- ggplot( df, aes(x=iteration, y=mu) ) + geom_line() + geom_point()
p2 <- ggplot( df, aes(x=mu) ) +
geom_histogram( aes(y=..density..), bins=30, color="white" ) +
stat_function( fun=dnorm, args=list(4, 0.6), color="blue" )
grid.arrange(p1, p2, ncol=2, top=textGrob(title))
}
plot_chain(res_a)
Warning message:
“The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(density)` instead.”
$w$ appears to be too large, almost no jumps are accepted. Consequently, only different small parts of the distribution are explored.
res_b = mh_tour( N=50, w=0.01 )
plot_chain(res_b)
The values in the chain are highly correlated - the chosen value for $w$ appears to be too small. Consequently, only a single small part of the distribution is explored.
res_c = mh_tour( N=1000, w=50 )
plot_chain(res_c)
Same as in a), now with more iterations. A bit more of the distribution is explored.
res_c = mh_tour( N=1000, w=0.01 )
plot_chain(res_c)
Same as in b), now with more iterations. A bit more of the patch in the distribution is explored, however not considerably more!
In a), $w$ is extremely large and the proposals jump usually far away from the mode of the posterior distribution; consequently, almost all jumps are rejected. This results in mostly constant values for $\mu$ with very few jumps. In b), $w$ is extremely small and a large amount of the proposed jumps is accepted (usually the posterior value is higher on one side). However since the jumps are only very small, only a little part of the total posterior distribution is explored.
If $w$ is too large, the acceptance probability is small. However, if longer chains are run, with time also a considerable amount of reasonably good samples will be available, scaling linearly with $N$. Here for $N=10'000$ steps:
res = mh_tour( N=10000, w=50 )
plot_chain(res)
Still, this is not comparable to a reasonable choice for $w$:
res = mh_tour( N=1000, w=1 )
plot_chain(res)
Adding more iterations for small $w$ does not help that much:
res = mh_tour( N=10000, w=0.01 )
plot_chain(res)
This is essentially a random walk and it is well known that a random walk with $N$ steps typically only covers a distance of $\sqrt{N}$ steps. So it is expected, that the chain only gets $\sqrt{N} \times w$ closer to the mode of the posterior distribution:
sqrt(10000) * 0.01
This is more or less in line with the above plot.
one_mh_iteration_normal <- function(s, current){
# STEP 1: Propose the next chain location
proposal <- rnorm(1, mean=current, sd=s)
# STEP 2: Decide whether or not to go there
proposal_plaus <- dnorm(proposal, 0, 1) * dnorm(6.25, proposal, 0.75)
current_plaus <- dnorm(current, 0, 1) * dnorm(6.25, current, 0.75)
alpha <- min(1, proposal_plaus / current_plaus)
next_stop <- sample(c(proposal, current), size = 1, prob = c(alpha, 1-alpha))
# Return the results
return(data.frame(proposal, alpha, next_stop))
}
set.seed(1)
one_mh_iteration_normal(s=0.01, current=3)
| proposal | alpha | next_stop |
|---|---|---|
| <dbl> | <dbl> | <dbl> |
| 2.993735 | 0.9826955 | 2.993735 |
$s$ is probably too small - small distance covered, high acceptance rates (rerun without fixed seed).
set.seed(1)
one_mh_iteration_normal(s=0.5, current=3)
| proposal | alpha | next_stop |
|---|---|---|
| <dbl> | <dbl> | <dbl> |
| 2.686773 | 0.3655544 | 3 |
$s$ might be just right (rerun without fixed seed).
set.seed(1)
one_mh_iteration_normal(s=1, current=3)
| proposal | alpha | next_stop |
|---|---|---|
| <dbl> | <dbl> | <dbl> |
| 2.373546 | 0.1017526 | 3 |
$s$ might be just right (varying acceptance rates).
set.seed(1)
one_mh_iteration_normal(s=3, current=3)
| proposal | alpha | next_stop |
|---|---|---|
| <dbl> | <dbl> | <dbl> |
| 1.120639 | 4.002513e-05 | 3 |
Extremely small acceptance rates - $s$ might now be too large.
mh_tour_normal <- function(N, s){
# 1. Start the chain at location 3
current <- 3
# 2. Initialize the simulation
mu <- rep(0, N)
# 3. Simulate N Markov chain stops
for(i in 1:N){
# Simulate one iteration
sim <- one_mh_iteration_normal(s = s, current = current)
# Record next location
mu[i] <- sim$next_stop
# Reset the current location
current <- sim$next_stop
}
# 4. Return the chain locations
return(data.frame(iteration = c(1:N), mu))
}
set.seed(84735)
res_a = mh_tour_normal( N=20, s=0.01 )
plot_chain( res_a )
set.seed(84735)
res_b = mh_tour_normal( N=20, s=10 )
plot_chain( res_b )
set.seed(84735)
res_c = mh_tour_normal( N=1000, s=0.01 )
plot_chain( res_c )
set.seed(84735)
res_c = mh_tour_normal( N=1000, s=10 )
plot_chain( res_c )
Similarly to exercise 7.11, $s=0.01$ is too small (producing a highly correlated chain that only explores a small part of the posterior distribution) and $s=10$ is too large (leading to a very low acceptance rate and exploring the posterior only at a very sparse number of points). The results for a higher number of iterations is qualitatively different to exercise 7.11. Using more iterations helps a bit for too large values of $s$, but only provides very minor improvements for too small values of $s$.
Grid search:
for( s in c(0.01,0.03,0.1,0.3,1,3,10) ) {
res = mh_tour_normal( N=1000, s=s )
plot_chain( res, title=paste0("s = ", s, collapse = "") )
}
A value around $s=0.3$ seems reasonable. Explore this part of the grid a bit:
for( s in c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9) ) {
res = mh_tour_normal( N=1000, s=s )
plot_chain( res, title=paste0("s = ", s, collapse = "") )
}
Most approximations with $s>0.3$ look more or less reasonable for the limit number of iterations.
new_mh_iteration <- function(w, current, m, s){
# STEP 1: Propose the next chain location
proposal <- runif(1, min = current - w, max = current + w)
# STEP 2: Decide whether or not to go there
proposal_plaus <- dnorm(proposal, m, s) * dnorm(6.25, proposal, 0.75)
current_plaus <- dnorm(current, m, s) * dnorm(6.25, current, 0.75)
alpha <- min(1, proposal_plaus / current_plaus)
next_stop <- sample(c(proposal, current), size = 1, prob = c(alpha, 1-alpha))
# Return the results
return(data.frame(proposal, alpha, next_stop))
}
set.seed(84735)
new_mh_iteration(w=1, current=3, m=0, s=10)
| proposal | alpha | next_stop |
|---|---|---|
| <dbl> | <dbl> | <dbl> |
| 3.495377 | 1 | 3.495377 |
options(repr.plot.width=15, repr.plot.height=3)
plot_normal_normal(mean=0, sd=10, n=1, y_bar=6.25, sigma=0.75)
summarize_normal_normal(mean=0, sd=10, n=1, y_bar=6.25, sigma=0.75)
| model | mean | mode | var | sd |
|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <dbl> |
| prior | 0.00000 | 0.00000 | 100.0000000 | 10.0000000 |
| posterior | 6.21504 | 6.21504 | 0.5593536 | 0.7478995 |
The proposed value is closer to the posterior mode and consequently accepted.
set.seed(84735)
new_mh_iteration(w=1, current=3, m=20, s=1)
| proposal | alpha | next_stop |
|---|---|---|
| <dbl> | <dbl> | <dbl> |
| 3.495377 | 1 | 3.495377 |
plot_normal_normal(mean=20, sd=1, n=1, y_bar=6.25, sigma=0.75)
summarize_normal_normal(mean=20, sd=1, n=1, y_bar=6.25, sigma=0.75)
| model | mean | mode | var | sd |
|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <dbl> |
| prior | 20.0 | 20.0 | 1.00 | 1.0 |
| posterior | 11.2 | 11.2 | 0.36 | 0.6 |
The (same) proposed value is still closer to the posterior mode (even if it's a bit further away now) and consequently it is accepted.
set.seed(84735)
new_mh_iteration(w=0.1, current=3, m=20, s=1)
| proposal | alpha | next_stop |
|---|---|---|
| <dbl> | <dbl> | <dbl> |
| 3.049538 | 1 | 3.049538 |
plot_normal_normal(mean=20, sd=1, n=1, y_bar=6.25, sigma=0.75)
summarize_normal_normal(mean=20, sd=1, n=1, y_bar=6.25, sigma=0.75)
| model | mean | mode | var | sd |
|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <dbl> |
| prior | 20.0 | 20.0 | 1.00 | 1.0 |
| posterior | 11.2 | 11.2 | 0.36 | 0.6 |
Similar to c), the sampled value is closer to the posterior mode and accepted. However it will take much longer to reach the posterior mode, since $w$ is 10 times smaller.
set.seed(84735)
new_mh_iteration(w=0.1, current=3, m=-15, s=10)
| proposal | alpha | next_stop |
|---|---|---|
| <dbl> | <dbl> | <dbl> |
| 3.049538 | 1 | 3.049538 |
plot_normal_normal(mean=-15, sd=10, n=1, y_bar=6.25, sigma=0.75)
summarize_normal_normal(mean=-15, sd=10, n=1, y_bar=6.25, sigma=0.75)
| model | mean | mode | var | sd |
|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <dbl> |
| prior | -15.000000 | -15.000000 | 100.0000000 | 10.0000000 |
| posterior | 6.131137 | 6.131137 | 0.5593536 | 0.7478995 |
The sampled value is closer to the posterior mode and, consequently, also accepted.
The priors with $s=10$ are extremely vague and the posterior is dominated by the measured data. The priors with $s=1$ are a bit more informative and the balancing of the posterior between prior and likelihood is better visible.
$\lambda \sim \text{Gamma}(1, 0.1)$
$\lambda|Y \sim \text{Pois}(\lambda)$
Since $\lambda>0$ can be arbitrary large, sampling from a Beta distribution (values between 0-1) or sampling from a normal distribution (non-zero probabilities for negative values) is not an option. Consequently, an exponential distribution with PDF
$$ p(\lambda) = \alpha\, e^{-\alpha x} $$is used, which samples any value of $\lambda > 0$. The choice of $\alpha$ will be discussed in b).
Closed-form posterior:
summarize_gamma_poisson( shape=1, rate=0.1, sum_y=4, n=1 )
| model | shape | rate | mean | mode | var | sd |
|---|---|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> |
| prior | 1 | 0.1 | 10.000000 | 0.000000 | 100.000000 | 10.000000 |
| posterior | 5 | 1.1 | 4.545455 | 3.636364 | 4.132231 | 2.032789 |
Implement Metropolis-Hastings:
gp_mh_iteration <- function(alpha, current){
# STEP 1: Propose the next chain location
proposal <- rexp(1, rate=alpha)
# STEP 2: Decide whether or not to go there
proposal_plaus <- dgamma(proposal, 1, 0.1) * dpois(4, proposal)
current_plaus <- dgamma(current, 1, 0.1) * dpois(4, current)
alpha <- min(1, proposal_plaus / current_plaus)
next_stop <- sample(c(proposal, current), size = 1, prob = c(alpha, 1-alpha))
# Return the results
return(data.frame(proposal, alpha, next_stop))
}
mh_tour_gp <- function(N, alpha){
# 1. Start the chain at location 3
current <- 3
# 2. Initialize the simulation
mu <- rep(0, N)
# 3. Simulate N Markov chain stops
for(i in 1:N){
# Simulate one iteration
sim <- gp_mh_iteration(alpha = alpha, current = current)
# Record next location
mu[i] <- sim$next_stop
# Reset the current location
current <- sim$next_stop
}
# 4. Return the chain locations
return(data.frame(iteration = c(1:N), mu))
}
Compute and visualize chains:
plot_chain <- function( df, title="" ) {
par(mfrow=c(2,2))
p1 <- ggplot( df, aes(x=iteration, y=mu) ) + geom_line() + geom_point()
p2 <- ggplot( df, aes(x=mu) ) +
geom_histogram( aes(y=..density..), bins=30, color="white" ) +
stat_function( fun=dgamma, args=list(5, 1.1), color="blue" )
grid.arrange(p1, p2, ncol=2, top=textGrob(title))
}
for( alpha in c(0.01,0.03,0.1,0.3,1,3) ) {
res <- mh_tour_gp( 1000, alpha )
plot_chain( res, title=paste0("alpha = ", alpha, collapse="") )
}
Reasonable values seem to be around $\alpha=0.1$. With a bit more iterations:
res <- mh_tour_gp( 10000, alpha=0.1 )
plot_chain( res, title=paste0("alpha = ", 0.1, collapse="") )